package com.curry.apiprovider.crawler.pageProcessor.job;

import com.alibaba.fastjson.JSON;
import com.curry.apiprovider.crawler.Constants;
import com.curry.livehelper.entity.domain.TJobInfo;
import org.springframework.beans.factory.annotation.Autowired;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.processor.PageProcessor;
import us.codecraft.webmagic.selector.Selectable;

import java.util.List;

/**
 * 前程无忧数据子页面
 */
public class SubJobPageProcessor implements PageProcessor {

    private Site site = Site.me().setRetryTimes(3).setCharset("GB2312").setSleepTime(1000);
    private TJobInfo tJobInfo;
    @Autowired
    public SubJobPageProcessor(TJobInfo tJobInfo) {
        this.tJobInfo = tJobInfo;
    }

    @Override
    public void process(Page page) {
        List<String> sp4 = page.getHtml().css("div.tCompanyPage")
                .css("div.tCompany_main").css("div.t1").xpath("//span[@class='sp4']/text()").all();
        if (sp4 != null && sp4.size() > 3) {
            System.out.println(sp4);
            //年限要求 学历  招聘人数 发布时间
            tJobInfo.setWorkYear(sp4.get(0));
            tJobInfo.setEducation(sp4.get(1));
            tJobInfo.setApplicantNumber(sp4.get(2));
            tJobInfo.setPublishDate(sp4.get(3));
        } else {
            page.setSkip(true);
        }
        //福利待遇
        List<String> welfareTreatmentInfo = page.getHtml().css("div.tCompanyPage")
                .css("div.tCompany_main").xpath("//p[@class='t2']/span/text()").all();
        tJobInfo.setWelfare(JSON.toJSONString(welfareTreatmentInfo));
        List<Selectable> nodes = page.getHtml().css("div.tCompanyPage")
                .css("div.tCompany_main").css("div.tBorderTop_box").nodes();
        if (nodes != null && nodes.size() > 0) {
            //获取职业要求
            String jobMsg = nodes.get(1).xpath("//div[@class='bmsg job_msg inbox']/text()").get();
            tJobInfo.setJobinformation(jobMsg);
            //上班地址
            String officeAddress = nodes.get(2).xpath("//div[@class='bmsg inbox']/p/text()").get();
            tJobInfo.setOfficeAddress(officeAddress);
            //获取公司信息
            String companyInfo = nodes.get(3).xpath("//div[@class='tmsg inbox']/text()").get();
            tJobInfo.setCompanyInfo(companyInfo);
        }
        page.putField(Constants.KEY_JOB_INFO,JSON.toJSONString(tJobInfo));
    }
    @Override
    public Site getSite() {
        return site;
    }
}
